####
## Project:            Do Corporate Regulations Deter or Stimulate Investment? The Effect of the OECD Anti-Bribery Convention on FDI
## File name:          03-dyad_analysis.R
## Description:        Analyses country-level dyadic data and exports tables and plots with results
## Programmed by:      Lorenzo Crippa
####

# Setup ----
rm(list=ls())
graphics.off()
setwd("") # INSERT THE REPLICATION FOLDER FILE PATH HERE 
set.seed(47293)

library(lme4)
library(sampleSelection)
library(fixest)
library(gsynth)
library(janitor)
library(lemon)
library(cowplot)
library(grid)
library(gridExtra)
library(showtext)
library(modelsummary)
library(texreg)
library(tidyverse)
source("code/aux_functions.R")

font_add("Times New Roman", "times new roman.ttf")
showtext_auto()

options("modelsummary_format_numeric_latex" = "plain")

theme <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
               panel.background = element_rect(fill = "transparent", color = NA),
               plot.background = element_rect(fill = "transparent", color = NA),
               axis.line = element_line(colour = "black", linewidth = 0.3),
               text = element_text(colour = "black", size = 14, family = "Times New Roman"))
theme_set(theme)

# Import data ----
unctad <- read_rds("data/dyad_level.rds")

# Data description ----
# how many home economies?
length(unique(unctad$isocodeHome)) # 101 home countries
length(unique(unctad$isocodeHost[!is.na(unctad$paciHost2005)])) # 109 host countries with known PACI value

length(unique(unctad$ddyad[!is.na(unctad$logflowsAtoB) & !is.na(unctad$paciHost2005)])) # 1765 usable dyads 

## Table G.1 ----
# variables to omit:
omit <- c("year", "ddyad", # IDs
          # time-invariant stuff that gets removed by the dyad-FEs, we can omit it from the table:
          "oecdHome", "oecdHost", 
          "comlang", "colony", "dist" 
)

dat_summary <- unctad %>%
  # remove the non-lagged variables of host country covars from the summary
  select(-fdigdpHost, -gdppcHost, -tradeHost,
         -pcHost, -demHost, -lgdpHost, -ljiHost) %>%
  # remove ID and time-invariant variables:
  select(-all_of(omit)) %>%
  relocate(logflowsAtoB, flowsBinary, ratification, paciHost2005,
           l.fdigdpHost, l.gdppcHost, l.tradeHost, l.pcHost, l.demHost, l.lgdpHost, l.ljiHost,
           gdppcHome, gdpgrowHome, lgdpHome, ljiHome, bit) %>%
  rename(`Dyadic FDI (log)` = "logflowsAtoB",
         `Dyadic FDI (binary)` = "flowsBinary",
         `OECD Convention` = "ratification",
         `Host PACI` = "paciHost2005",
         `Lag Host FDI (GDP %)` = "l.fdigdpHost",
         `Lag Host GDP per capita` = "l.gdppcHost",
         `Lag Host Trade (GDP %)` = "l.tradeHost",
         `Lag Host POLCON III` = "l.pcHost",
         `Lag Host Democracy` = "l.demHost",
         `Lag Host GDP (log)` = "l.lgdpHost",
         `Lag Host Judiciary Indep.` = "l.ljiHost",
         `Home GDP per capita` = "gdppcHome",
         `Home GDP growth (%)` = "gdpgrowHome",
         `Home GDP (log)` = "lgdpHome",
         `Home Judiciary Indep.` = "ljiHome",
         `Dyad BIT` = "bit")

datasummary(data = dat_summary,
            formula = All(dat_summary) ~ N + Mean + SD + Min + Max,
            fmt = 3,
            output = "tables/supp_tab_g1.tex",
            title = "Dyadic country-level data. Summary statistics")

# Analysis ----
## Binning estimator ----
values <- quantile(unctad$paciHost2005, probs = seq(0, 1, 0.20), na.rm = T)
values <- values[values != 0]

# how many dyads per each bin?
unctad %>%
  mutate(group = case_when(paciHost2005 >= 0 & paciHost2005 <= values[1] ~ 1,
                           paciHost2005 > values[1] & paciHost2005 <= values[2] ~ 2,
                           paciHost2005 > values[2] & paciHost2005 <= values[3] ~ 3,
                           paciHost2005 > values[3] & paciHost2005 <= values[4] ~ 4,
                           paciHost2005 > values[4] & paciHost2005 <= values[5] ~ 5,
                           is.na(paciHost2005) ~ NA_real_,
                           TRUE ~ 0)) %>%
  filter(is.na(logflowsAtoB) == FALSE) %>% # drop missing observations for the DV
  group_by(ddyad) %>%
  mutate(group = unique(group)) %>%
  filter(row_number() == 1) %>%
  group_by(group) %>%
  summarise(n = n()) %>%
  filter(is.na(group) == FALSE) %>%
  adorn_totals("row")

### Generalised Synthetic Control: Main results ----
#### Bin 1 ----
out1_main <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "l.fdigdpHost",
                          "l.gdppcHost", "l.tradeHost", "l.pcHost", "l.lgdpHost", "l.ljiHost"),
                    data = unctad %>% filter(paciHost2005 >= 0 & paciHost2005 <= values[1]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Figure 4.a ----
plot(out1_main, type = "counterfactual", raw = "none", main="", shade.post = FALSE) + 
  geom_vline(xintercept = 0, col = I("grey50")) + ylab("Average dyadic FDI") +
  xlab("Time relative to treatment") +
  scale_size_manual(values = c(0.8, 0.8), breaks = c("co", "tr"),
                    labels = c("Synthetic control", "Observed treated")) +
  scale_color_manual(values = c("black", "black"), breaks = c("co", "tr"),
                     labels = c("Synthetic control", "Observed treated")) +
  scale_linetype_manual(values = c("dotted", "solid"), breaks = c("co", "tr"),
                        labels = c("Synthetic control", "Observed treated")) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        legend.background = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4a.pdf", device = "pdf", width = 8, height = 5)

##### Figure 4.b ----
plot(out1_main, main = "") + geom_vline(xintercept = 0, col = I("grey50")) + 
  geom_hline(yintercept = 0, col = I("grey50")) + 
  ylab("ATT and 95% confidence intervals") + xlab("Host PACI") +
  xlab("Time relative to treatment") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4b.pdf", device = "pdf", width = 8, height = 5)

#### Bin 2 ----
out2_main <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "l.fdigdpHost",
                          "l.gdppcHost", "l.tradeHost", "l.pcHost", "l.lgdpHost", "l.ljiHost"),
                    data = unctad %>% filter(paciHost2005 > values[1] & paciHost2005 <= values[2]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE,
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Figure 4.c ----
plot(out2_main, type = "counterfactual", raw = "none", main="", shade.post = FALSE) + 
  geom_vline(xintercept = 0, col = I("grey50")) + ylab("Average dyadic FDI") +
  xlab("Time relative to treatment") +
  scale_size_manual(values = c(0.8, 0.8), breaks = c("co", "tr"),
                    labels = c("Synthetic control", "Observed treated")) +
  scale_color_manual(values = c("black", "black"), breaks = c("co", "tr"),
                     labels = c("Synthetic control", "Observed treated")) +
  scale_linetype_manual(values = c("dotted", "solid"), breaks = c("co", "tr"),
                        labels = c("Synthetic control", "Observed treated")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        legend.background = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4c.pdf", device = "pdf", width = 8, height = 5)

##### Figure 4.d ----
plot(out2_main, main = "") + geom_vline(xintercept = 0, col = I("grey50")) + 
  geom_hline(yintercept = 0, col = I("grey50")) + 
  ylab("ATT and 95% confidence intervals") + xlab("Host PACI") +
  xlab("Time relative to treatment") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4d.pdf", device = "pdf", width = 8, height = 5)

#### Bin 3 ----
out3_main <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "l.fdigdpHost",
                          "l.gdppcHost", "l.tradeHost", "l.pcHost", "l.lgdpHost", "l.ljiHost"),
                    data = unctad %>% filter(paciHost2005 > values[2] & paciHost2005 <= values[3]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Figure 4.e ----
plot(out3_main, type = "counterfactual", raw = "none", main="", shade.post = FALSE) + 
  geom_vline(xintercept = 0, col = I("grey50")) + ylab("Average dyadic FDI") +
  xlab("Time relative to treatment") +
  scale_size_manual(values = c(0.8, 0.8), breaks = c("co", "tr"),
                    labels = c("Synthetic control", "Observed treated")) +
  scale_color_manual(values = c("black", "black"), breaks = c("co", "tr"),
                     labels = c("Synthetic control", "Observed treated")) +
  scale_linetype_manual(values = c("dotted", "solid"), breaks = c("co", "tr"),
                        labels = c("Synthetic control", "Observed treated")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        legend.background = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4e.pdf", device = "pdf", width = 8, height = 5)

##### Figure 4.f ----
plot(out3_main, main = "") + geom_vline(xintercept = 0, col = I("grey50")) + 
  geom_hline(yintercept = 0, col = I("grey50")) + 
  ylab("ATT and 95% confidence intervals") + xlab("Host PACI") +
  xlab("Time relative to treatment") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4f.pdf", device = "pdf", width = 8, height = 5)

#### Bin 4 ----
out4_main <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "l.fdigdpHost",
                          "l.gdppcHost", "l.tradeHost", "l.pcHost", "l.lgdpHost", "l.ljiHost"),
                    data = unctad %>% filter(paciHost2005 >= values[3] & paciHost2005 <= values[4]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Figure 4.g ----
plot(out4_main, type = "counterfactual", raw = "none", main="", shade.post = FALSE) + 
  geom_vline(xintercept = 0, col = I("grey50")) + ylab("Average dyadic FDI") +
  xlab("Time relative to treatment") +
  scale_size_manual(values = c(0.8, 0.8), breaks = c("co", "tr"),
                    labels = c("Synthetic control", "Observed treated")) +
  scale_color_manual(values = c("black", "black"), breaks = c("co", "tr"),
                     labels = c("Synthetic control", "Observed treated")) +
  scale_linetype_manual(values = c("dotted", "solid"), breaks = c("co", "tr"),
                        labels = c("Synthetic control", "Observed treated")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        legend.background = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4g.pdf", device = "pdf", width = 8, height = 5)

##### Figure 4.h ----
plot(out4_main, main = "") + geom_vline(xintercept = 0, col = I("grey50")) + 
  geom_hline(yintercept = 0, col = I("grey50")) +
  ylab("ATT and 95% confidence intervals") + xlab("Host PACI") +
  xlab("Time relative to treatment") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4h.pdf", device = "pdf", width = 8, height = 5)

#### Bin 5 ----
out5_main <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "l.fdigdpHost",
                          "l.gdppcHost", "l.tradeHost", "l.pcHost", "l.lgdpHost", "l.ljiHost"),
                    data = unctad %>% filter(paciHost2005 > values[4] & paciHost2005 <= values[5]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = FALSE,
                    EM = TRUE)

##### Figure 4.i ----
plot(out5_main, type = "counterfactual", raw = "none", main="", shade.post = FALSE) + 
  geom_vline(xintercept = 0, col = I("grey50")) + ylab("Average dyadic FDI") +
  xlab("Time relative to treatment") +
  scale_size_manual(values = c(0.8, 0.8), breaks = c("co", "tr"),
                    labels = c("Synthetic control", "Observed treated")) +
  scale_color_manual(values = c("black", "black"), breaks = c("co", "tr"),
                     labels = c("Synthetic control", "Observed treated")) +
  scale_linetype_manual(values = c("dotted", "solid"), breaks = c("co", "tr"),
                        labels = c("Synthetic control", "Observed treated")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        legend.background = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4i.pdf", device = "pdf", width = 8, height = 5)

##### Figure 4.j ----
plot(out5_main, main = "") + geom_vline(xintercept = 0, col = I("grey50")) + 
  geom_hline(yintercept = 0, col = I("grey50")) + 
  ylab("ATT and 95% confidence intervals") + xlab("Host PACI") +
  xlab("Time relative to treatment") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        panel.border = element_blank(),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        text = element_text(colour = "black", size = 14, family = "Times New Roman"),
        axis.text.x = element_text(size = 20), axis.title.x = element_text(size = 20),
        axis.text.y = element_text(size = 20), axis.title.y = element_text(size = 20),
        legend.text = element_text(size = 20))
ggsave("plots/main_fig_4j.pdf", device = "pdf", width = 8, height = 5)

#### Figure 5 ----
estimates <- data.frame(
  bin = 1:5,
  att = rep(NA, 5),
  se = rep(NA, 5),
  ci.lo = rep(NA, 5),
  ci.hi = rep(NA, 5),
  n = rep(NA, 5),
  n.treat = rep(NA, 5),
  n.contr = rep(NA, 5)
)

estimates$att[estimates$bin == 1] <- out1_main$att.avg
estimates$se[estimates$bin == 1] <- out1_main$est.avg[2]
estimates$ci.lo[estimates$bin == 1] <- out1_main$est.avg[3]
estimates$ci.hi[estimates$bin == 1] <- out1_main$est.avg[4]
estimates$n[estimates$bin == 1] <- out1_main$N
estimates$n.treat[estimates$bin == 1] <- out1_main$Ntr
estimates$n.contr[estimates$bin == 1] <- out1_main$Nco

estimates$att[estimates$bin == 2] <- out2_main$att.avg
estimates$se[estimates$bin == 2] <- out2_main$est.avg[2]
estimates$ci.lo[estimates$bin == 2] <- out2_main$est.avg[3]
estimates$ci.hi[estimates$bin == 2] <- out2_main$est.avg[4]
estimates$n[estimates$bin == 2] <- out2_main$N
estimates$n.treat[estimates$bin == 2] <- out2_main$Ntr
estimates$n.contr[estimates$bin == 2] <- out2_main$Nco

estimates$att[estimates$bin == 3] <- out3_main$att.avg
estimates$se[estimates$bin == 3] <- out3_main$est.avg[2]
estimates$ci.lo[estimates$bin == 3] <- out3_main$est.avg[3]
estimates$ci.hi[estimates$bin == 3] <- out3_main$est.avg[4]
estimates$n[estimates$bin == 3] <- out3_main$N
estimates$n.treat[estimates$bin == 3] <- out3_main$Ntr
estimates$n.contr[estimates$bin == 3] <- out3_main$Nco

estimates$att[estimates$bin == 4] <- out4_main$att.avg
estimates$se[estimates$bin == 4] <- out4_main$est.avg[2]
estimates$ci.lo[estimates$bin == 4] <- out4_main$est.avg[3]
estimates$ci.hi[estimates$bin == 4] <- out4_main$est.avg[4]
estimates$n[estimates$bin == 4] <- out4_main$N
estimates$n.treat[estimates$bin == 4] <- out4_main$Ntr
estimates$n.contr[estimates$bin == 4] <- out4_main$Nco

estimates$att[estimates$bin == 5] <- out5_main$att.avg
estimates$se[estimates$bin == 5] <- out5_main$est.avg[2]
estimates$ci.lo[estimates$bin == 5] <- out5_main$est.avg[3]
estimates$ci.hi[estimates$bin == 5] <- out5_main$est.avg[4]
estimates$n[estimates$bin == 5] <- out5_main$N
estimates$n.treat[estimates$bin == 5] <- out5_main$Ntr
estimates$n.contr[estimates$bin == 5] <- out5_main$Nco

# position point estimates in the middle of each used interval of paciHost
estimates <- estimates %>%
  mutate(interval = c(values[1], values[2], values[3], values[4], values[5]),
         paci = case_when(interval == values[1] ~ 0+(values[1]-0)/2,
                          interval == values[2] ~ values[1]+(values[2]-values[1])/2,
                          interval == values[3] ~ values[2]+(values[3]-values[2])/2,
                          interval == values[4] ~ values[3]+(values[4]-values[3])/2,
                          interval == values[5] ~ values[4]+(values[5]-values[4])/2, 
                          TRUE ~ NA_real_))

shift <- 2 # shift estimates up
enlarger <- 4 # factor to multiply histogram and make it larger
estimates %>%
  mutate(paciHost2005 = values,
         paci = case_when(paciHost2005 == values[1] ~ 0+(values[1]-0)/2,
                          paciHost2005 == values[2] ~ values[1]+(values[2]-values[1])/2,
                          paciHost2005 == values[3] ~ values[2]+(values[3]-values[2])/2,
                          paciHost2005 == values[4] ~ values[3]+(values[4]-values[3])/2,
                          paciHost2005 == values[5] ~ values[4]+(values[5]-values[4])/2,
                          TRUE ~ NA_real_),
         ci.lo90 = att - qnorm(0.95)*se,
         ci.hi90 = att + qnorm(0.95)*se,
         ci.lo99 = att - qnorm(0.995)*se,
         ci.hi99 = att + qnorm(0.995)*se) %>%
  ggplot() + 
  geom_hline(yintercept = 0 + shift, size = 0.2, col = I("grey")) +
  geom_point(mapping = aes(x = paci, y = att + shift)) + 
  geom_pointrange(aes(x = paci, y = att + shift, ymin = ci.lo + shift, ymax = ci.hi + shift),
                  fatten = 2, size = 1) +
  geom_text(aes(x = paci + 0.6, y = att + 0.2 + shift, 
                label = paste0("N = ", as.character(n))),
            size = 3, family = "Times New Roman") +
  ylab("Estimated ATT and 95% confidence intervals") + xlab("Host PACI") +
  geom_histogram(data = unctad %>%
                   # make sure to keep only dyads that enter in the analysis when plotting the histogram:
                   filter(ddyad %in% c(out1_main$id, out2_main$id,
                                       out3_main$id, out4_main$id, out5_main$id)),
                 aes(x = paciHost2005, y = enlarger*((after_stat(count))/sum(after_stat(count))))) +
  scale_y_continuous(labels = function(x) x - shift, breaks = c(shift-2, shift-1.5, shift-1, shift-.5, shift, shift+.5, shift+1))
dev.copy("plots/main_fig_5.pdf", device = pdf, width = 5, height = 5)
dev.off()

# effect sizes in Million of USD for treated dyads:
effects <- exp(abs(estimates$att)) * sign(estimates$att)
effects
# [1]  1.391824  2.011202  1.827358 -1.040185 -2.081684

# What is the average net flow towards dyads in each of these bins before the treatment?
baseline <- exp(c(mean(unctad$logflowsAtoB[unctad$year < 1999 & unctad$paciHost2005 < values[1]], na.rm = TRUE),
                  mean(unctad$logflowsAtoB[unctad$year < 1999 & unctad$paciHost2005 >= values[1] & unctad$paciHost2005 < values[2]], na.rm = TRUE),
                  mean(unctad$logflowsAtoB[unctad$year < 1999 & unctad$paciHost2005 >= values[2] & unctad$paciHost2005 < values[3]], na.rm = TRUE),
                  mean(unctad$logflowsAtoB[unctad$year < 1999 & unctad$paciHost2005 >= values[3] & unctad$paciHost2005 < values[4]], na.rm = TRUE),
                  mean(unctad$logflowsAtoB[unctad$year < 1999 & unctad$paciHost2005 >= values[4] & unctad$paciHost2005 < values[5]], na.rm = TRUE)))
baseline
# [1] 69.294277 95.280793 20.690730 22.052169  9.425471
effects/baseline*100
# [1]   2.008569   2.110815   8.831773  -4.716928 -22.085728

### Generalysed Synthetic Control: Robustness tests ----
#### 3 bins ----
values <- quantile(unctad$paciHost2005, probs = seq(0, 1, 1/3), na.rm = T)
values <- values[values != 0]

##### Bin 1 ----
out1_3bin <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "fdigdpHost", 
                          "gdppcHost", "tradeHost", "pcHost", "lgdpHost", "ljiHost"), 
                    data = unctad %>% filter(paciHost2005 >= 0 & paciHost2005 <= values[1]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)
##### Bin 2 ----
out2_3bin <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "fdigdpHost", 
                          "gdppcHost", "tradeHost", "pcHost", "lgdpHost", "ljiHost"), 
                    data = unctad %>% filter(paciHost2005 >= values[1] & paciHost2005 <= values[2]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Bin 3 ----
out3_3bin <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "fdigdpHost", 
                          "gdppcHost", "tradeHost", "pcHost", "lgdpHost", "ljiHost"), 
                    data = unctad %>% filter(paciHost2005 >= values[2] & paciHost2005 <= values[3]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Figure H.1 ----
estimates <- data.frame(
  bin = 1:3,
  att = rep(NA, 3),
  se = rep(NA, 3),
  ci.lo = rep(NA, 3),
  ci.hi = rep(NA, 3),
  n = rep(NA, 3)
)

estimates$att[estimates$bin == 1] <- out1_3bin$att.avg
estimates$se[estimates$bin == 1] <- out1_3bin$est.avg[2]
estimates$ci.lo[estimates$bin == 1] <- out1_3bin$est.avg[3]
estimates$ci.hi[estimates$bin == 1] <- out1_3bin$est.avg[4]
estimates$n[estimates$bin == 1] <- out1_3bin$N

estimates$att[estimates$bin == 2] <- out2_3bin$att.avg
estimates$se[estimates$bin == 2] <- out2_3bin$est.avg[2]
estimates$ci.lo[estimates$bin == 2] <- out2_3bin$est.avg[3]
estimates$ci.hi[estimates$bin == 2] <- out2_3bin$est.avg[4]
estimates$n[estimates$bin == 2] <- out2_3bin$N

estimates$att[estimates$bin == 3] <- out3_3bin$att.avg
estimates$se[estimates$bin == 3] <- out3_3bin$est.avg[2]
estimates$ci.lo[estimates$bin == 3] <- out3_3bin$est.avg[3]
estimates$ci.hi[estimates$bin == 3] <- out3_3bin$est.avg[4]
estimates$n[estimates$bin == 3] <- out3_3bin$N

shift <- 1.3 # shift estimates up
enlarger <- 4 # factor to multiply histogram and make it larger
estimates %>%
  mutate(paciHost2005 = values,
         paci = case_when(paciHost2005 == values[1] ~ 0+(values[1]-0)/2,
                          paciHost2005 == values[2] ~ values[1]+(values[2]-values[1])/2,
                          paciHost2005 == values[3] ~ values[2]+(values[3]-values[2])/2,
                          paciHost2005 == values[4] ~ values[3]+(values[4]-values[3])/2,
                          paciHost2005 == values[5] ~ values[4]+(values[5]-values[4])/2,
                          TRUE ~ NA_real_),
         ci.lo90 = att - qnorm(0.95)*se,
         ci.hi90 = att + qnorm(0.95)*se,
         ci.lo99 = att - qnorm(0.995)*se,
         ci.hi99 = att + qnorm(0.995)*se) %>%
  ggplot() + 
  geom_hline(yintercept = 0 + shift, size = 0.2, col = I("grey")) +
  geom_point(mapping = aes(x = paci, y = att + shift)) + 
  geom_pointrange(aes(x = paci, y = att + shift, ymin = ci.lo + shift, ymax = ci.hi + shift),
                  fatten = 2, size = 1) +
  geom_text(aes(x = paci + 0.6, y = att + 0.2 + shift, 
                label = paste0("N = ", as.character(n))),
            size = 3, family = "Times New Roman") +
  ylab("Estimated ATT and 95% confidence intervals") + xlab("Host PACI") +
  geom_histogram(data = unctad %>%
                   filter(ddyad %in% c(out1_3bin$id, out2_3bin$id, out3_3bin$id)),
                 aes(x = paciHost2005, y = enlarger*((after_stat(count))/sum(after_stat(count))))) +
  scale_y_continuous(labels = function(x) x - shift, breaks = c(shift-1, shift-.5, shift, shift+.5))
dev.copy("plots/supp_fig_h1.pdf", device = pdf, width = 5, height = 5)
dev.off()

#### 4 bins ----
values <- quantile(unctad$paciHost2005, probs = seq(0, 1, 0.25), na.rm = T)
values <- values[values != 0]


##### Bin 1 ----
out1_4bin <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "fdigdpHost", 
                          "gdppcHost", "tradeHost", "pcHost", "lgdpHost", "ljiHost"), 
                    data = unctad %>% filter(paciHost2005 >= 0 & paciHost2005 <= values[1]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Bin 2 ----
out2_4bin <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "fdigdpHost", 
                          "gdppcHost", "tradeHost", "pcHost", "lgdpHost", "ljiHost"), 
                    data = unctad %>% filter(paciHost2005 >= values[1] & paciHost2005 <= values[2]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Bin 3 ----
out3_4bin <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "fdigdpHost", 
                          "gdppcHost", "tradeHost", "pcHost", "lgdpHost", "ljiHost"), 
                    data = unctad %>% filter(paciHost2005 >= values[2] & paciHost2005 <= values[3]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Bin 4 ----
out4_4bin <- gsynth(logflowsAtoB ~ ratification,
                    X = c("gdpgrowHome", "gdppcHome", "lgdpHome", "ljiHome", "fdigdpHost", 
                          "gdppcHost", "tradeHost", "pcHost", "lgdpHost", "ljiHost"), 
                    data = unctad %>% filter(paciHost2005 >= values[3] & paciHost2005 <= values[4]), 
                    index = c("ddyad","year"), 
                    force = "two-way", 
                    na.rm = T,
                    min.T0 = 5,
                    CV = TRUE, 
                    r = c(0, 3), 
                    se = TRUE, 
                    inference = "parametric", 
                    nboots = 1000, 
                    parallel = TRUE,
                    EM = TRUE)

##### Figure H.2 ----
estimates <- data.frame(
  bin = 1:4,
  att = rep(NA, 4),
  se = rep(NA, 4),
  ci.lo = rep(NA, 4),
  ci.hi = rep(NA, 4),
  n = rep(NA, 4)
)

estimates$att[estimates$bin == 1] <- out1_4bin$att.avg
estimates$se[estimates$bin == 1] <- out1_4bin$est.avg[2]
estimates$ci.lo[estimates$bin == 1] <- out1_4bin$est.avg[3]
estimates$ci.hi[estimates$bin == 1] <- out1_4bin$est.avg[4]
estimates$n[estimates$bin == 1] <- out1_4bin$N

estimates$att[estimates$bin == 2] <- out2_4bin$att.avg
estimates$se[estimates$bin == 2] <- out2_4bin$est.avg[2]
estimates$ci.lo[estimates$bin == 2] <- out2_4bin$est.avg[3]
estimates$ci.hi[estimates$bin == 2] <- out2_4bin$est.avg[4]
estimates$n[estimates$bin == 2] <- out2_4bin$N

estimates$att[estimates$bin == 3] <- out3_4bin$att.avg
estimates$se[estimates$bin == 3] <- out3_4bin$est.avg[2]
estimates$ci.lo[estimates$bin == 3] <- out3_4bin$est.avg[3]
estimates$ci.hi[estimates$bin == 3] <- out3_4bin$est.avg[4]
estimates$n[estimates$bin == 3] <- out3_4bin$N

estimates$att[estimates$bin == 4] <- out4_4bin$att.avg
estimates$se[estimates$bin == 4] <- out4_4bin$est.avg[2]
estimates$ci.lo[estimates$bin == 4] <- out4_4bin$est.avg[3]
estimates$ci.hi[estimates$bin == 4] <- out4_4bin$est.avg[4]
estimates$n[estimates$bin == 4] <- out4_4bin$N

shift <- 1.5 # shift estimates up
enlarger <- 4 # factor to multiply histogram and make it larger
estimates %>%
  mutate(paciHost2005 = values,
         paci = case_when(paciHost2005 == values[1] ~ 0+(values[1]-0)/2,
                          paciHost2005 == values[2] ~ values[1]+(values[2]-values[1])/2,
                          paciHost2005 == values[3] ~ values[2]+(values[3]-values[2])/2,
                          paciHost2005 == values[4] ~ values[3]+(values[4]-values[3])/2,
                          paciHost2005 == values[5] ~ values[4]+(values[5]-values[4])/2,
                          TRUE ~ NA_real_),
         ci.lo90 = att - qnorm(0.95)*se,
         ci.hi90 = att + qnorm(0.95)*se,
         ci.lo99 = att - qnorm(0.995)*se,
         ci.hi99 = att + qnorm(0.995)*se) %>%
  ggplot() + 
  geom_hline(yintercept = 0 + shift, size = 0.2, col = I("grey")) +
  geom_point(mapping = aes(x = paci, y = att + shift)) + 
  geom_pointrange(aes(x = paci, y = att + shift, ymin = ci.lo + shift, ymax = ci.hi + shift),
                  fatten = 2, size = 1) +
  geom_text(aes(x = paci + 0.6, y = att + 0.2 + shift, 
                label = paste0("N = ", as.character(n))),
            size = 3, family = "Times New Roman") +
  ylab("Estimated ATT and 95% confidence intervals") + xlab("Host PACI") +
  geom_histogram(data = unctad %>% 
                   filter(ddyad %in% c(out1_4bin$id, out2_4bin$id,
                                       out3_4bin$id, out4_4bin$id)),
                 aes(x = paciHost2005, y = enlarger*((after_stat(count))/sum(after_stat(count))))) +
  scale_y_continuous(labels = function(x) x - shift, breaks = c(shift-1, shift-.5, shift, shift+.5))
dev.copy("plots/supp_fig_h2.pdf", device = pdf, width = 5, height = 5)
dev.off()

### 2FE (with and without buffer) ----
values <- quantile(unctad$paciHost2005, probs = seq(0, 1, 0.20), na.rm = T)
values <- values[values != 0]

estimates <- data.frame(
  interval = unname(values),
  estimate = rep(NA, length(values)),
  se = rep(NA, length(values)),
  nobs = rep(NA, length(values)),
  n = rep(NA, length(values)),
  low99 = rep(NA, length(values)),
  hi99 = rep(NA, length(values)),
  low95 = rep(NA, length(values)),
  hi95 = rep(NA, length(values)),
  low90 = rep(NA, length(values)),
  hi90 = rep(NA, length(values)),
  model = c(rep("(a) No buffer, no controls", length(values)),
            rep("(b) No buffer, all controls", length(values)),
            rep("(c) Buffer, no controls", length(values)),
            rep("(d) Buffer, all controls", length(values)))
)

# set bar object
bar <- txtProgressBar(min = 1,
                      max = nrow(estimates),
                      initial = 1,
                      char = "=",
                      width = 50,
                      style = 3)

for (i in 1:nrow(estimates)) {
  
  v <- estimates$interval[i]
  
  if (v == values[1]){
    v1 <- -0.1 # fake value below 0 to exclude from the interval
    v2 <- v
  } else {
    v1 <- estimates$interval[i-1]
    v2 <- v
  }
  
  if (estimates$model[i] == "(a) No buffer, no controls"){
    mod <- feols(logflowsAtoB ~ ratification | ddyad + year,
                 data = unctad %>% filter(paciHost2005 > v1 & paciHost2005 <= v2), 
                 cluster = ~ddyad,
                 warn = FALSE, 
                 notes = FALSE
    )
    estimates$n[i] <- unctad %>% filter(paciHost2005 > v1 & paciHost2005 <= v2) %>%
      ungroup() %>%
      select(ddyad, year, logflowsAtoB, ratification) %>%
      .[complete.cases(.),] %>%
      select(ddyad) %>% simplify() %>% unique() %>% length()
  } else if (estimates$model[i] == "(b) No buffer, all controls"){
    mod <- feols(logflowsAtoB ~ ratification +
                   (l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                      l.demHost + l.lgdpHost + l.ljiHost +
                      gdppcHome + gdpgrowHome + ljiHome + 
                      comlang + colony + bit + dist)*as.factor(year) | ddyad + year,
                 data = unctad %>% filter(paciHost2005 > v1 & paciHost2005 <= v2), 
                 cluster = ~ddyad,
                 warn = FALSE, 
                 notes = FALSE)
    estimates$n[i] <- unctad %>% filter(paciHost2005 > v1 & paciHost2005 <= v2) %>%
      ungroup() %>%
      select(ddyad, year, logflowsAtoB, ratification,
             l.fdigdpHost, l.gdppcHost, l.gdppcHost, l.tradeHost, 
             l.pcHost, l.demHost, l.lgdpHost, l.ljiHost, gdppcHome, 
             gdpgrowHome, ljiHome, comlang, colony, bit, dist) %>%
      .[complete.cases(.),] %>%
      select(ddyad) %>% simplify() %>% unique() %>% length()
  } else if (estimates$model[i] == "(c) Buffer, no controls"){
    unctad.dim <- unctad %>% filter(year != 1999 & year != 2000 & year != 2001) %>%
      filter(isocodeHome != "IE" & isocodeHome != "EE")
    mod <- feols(logflowsAtoB ~ ratification | ddyad + year,
                 data = unctad.dim %>% filter(paciHost2005 > v1 & paciHost2005 <= v2), 
                 cluster = ~ddyad,
                 warn = FALSE, 
                 notes = FALSE)
    estimates$n[i] <- unctad.dim %>% filter(paciHost2005 > v1 & paciHost2005 <= v2) %>%
      ungroup() %>%
      select(ddyad, year, logflowsAtoB, ratification) %>%
      .[complete.cases(.),] %>%
      select(ddyad) %>% simplify() %>% unique() %>% length()
  } else if (estimates$model[i] == "(d) Buffer, all controls"){
    unctad.dim <- unctad %>% filter(year != 1999 & year != 2000 & year != 2001) %>%
      filter(isocodeHome != "IE" & isocodeHome != "EE")
    mod <- feols(logflowsAtoB ~ ratification +
                   (l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                      l.demHost + l.lgdpHost + l.ljiHost +
                      gdppcHome + gdpgrowHome + ljiHome + 
                      comlang + colony + bit + dist)*as.factor(year) | ddyad + year,
                 data = unctad.dim %>% filter(paciHost2005 > v1 & paciHost2005 <= v2), 
                 cluster = ~ddyad,
                 warn = FALSE, 
                 notes = FALSE)
    estimates$n[i] <- unctad.dim %>% filter(paciHost2005 > v1 & paciHost2005 <= v2) %>%
      ungroup() %>%
      select(ddyad, year, logflowsAtoB, ratification,
             l.fdigdpHost, l.gdppcHost, l.gdppcHost, l.tradeHost, 
             l.pcHost, l.demHost, l.lgdpHost, l.ljiHost, gdppcHome, 
             gdpgrowHome, ljiHome, comlang, colony, bit, dist) %>%
      .[complete.cases(.),] %>%
      select(ddyad) %>% simplify() %>% unique() %>% length()
  }
  
  estimates$estimate[i] <- mod$coefficients["ratification"]
  estimates$nobs[i] <- nobs(mod)
  estimates$se[i] <- summary(mod)$se['ratification']
  
  cval99 <- qt(0.995, df = estimates$nobs[i]-1)
  cval95 <- qt(0.975, df = estimates$nobs[i]-1)
  cval90 <- qt(0.95, df = estimates$nobs[i]-1)
  
  estimates$low99[i] <- estimates$estimate[i] - cval99*estimates$se[i]
  estimates$hi99[i] <- estimates$estimate[i] + cval99*estimates$se[i]
  
  estimates$low95[i] <- estimates$estimate[i] - cval95*estimates$se[i]
  estimates$hi95[i] <- estimates$estimate[i] + cval95*estimates$se[i]
  
  estimates$low90[i] <- estimates$estimate[i] - cval90*estimates$se[i]
  estimates$hi90[i] <- estimates$estimate[i] + cval90*estimates$se[i]
  
  setTxtProgressBar(bar, i)
}

#### Figure I.1 ----
# position point estimates in the middle of each used interval of paciHost
estimates <- estimates %>%
  mutate(paci = case_when(interval == values[1] ~ 0+(values[1]-0)/2,
                          interval == values[2] ~ values[1]+(values[2]-values[1])/2,
                          interval == values[3] ~ values[2]+(values[3]-values[2])/2,
                          interval == values[4] ~ values[3]+(values[4]-values[3])/2,
                          interval == values[5] ~ values[4]+(values[5]-values[4])/2, 
                          TRUE ~ NA_real_))


# prepare df for distribution of the variable:
facet_df <- unctad %>%
  ungroup() %>%
  select(ddyad, year, logflowsAtoB, ratification, paciHost2005) %>%
  .[complete.cases(.),] %>%
  select(paciHost2005) %>%
  mutate(model = "(a) No buffer, no controls") %>%
  rbind(unctad %>%
          ungroup() %>%
          select(ddyad, year, logflowsAtoB, ratification,
                 l.fdigdpHost, l.gdppcHost, l.gdppcHost, l.tradeHost, 
                 l.pcHost, l.demHost, l.lgdpHost, l.ljiHost, gdppcHome, 
                 gdpgrowHome, ljiHome, comlang, colony, bit, dist,
                 paciHost2005) %>%
          .[complete.cases(.),] %>%
          select(paciHost2005) %>%
          mutate(model = "(b) No buffer, all controls")) %>%
  rbind(unctad %>%
          ungroup() %>%
          filter(year != 1999 & year != 2000 & year != 2001) %>%
          filter(isocodeHome != "IE" & isocodeHome != "EE") %>%
          select(ddyad, year, logflowsAtoB, ratification, paciHost2005) %>%
          .[complete.cases(.),] %>%
          select(paciHost2005) %>%
          mutate(model = "(c) Buffer, no controls")) %>%
  rbind(unctad %>%
          ungroup() %>%
          filter(year != 1999 & year != 2000 & year != 2001) %>%
          filter(isocodeHome != "IE" & isocodeHome != "EE") %>%
          select(ddyad, year, logflowsAtoB, ratification,
                 l.fdigdpHost, l.gdppcHost, l.gdppcHost, l.tradeHost, 
                 l.pcHost, l.demHost, l.lgdpHost, l.ljiHost, gdppcHome, 
                 gdpgrowHome, ljiHome, comlang, colony, bit, dist,
                 paciHost2005) %>%
          .[complete.cases(.),] %>%
          select(paciHost2005) %>%
          mutate(model = "(d) Buffer, all controls"))

shift <- 1.5 # shift estimates +2 up
enlarger <- 8 # factor to multiply histogram and make it larger
p_top <- estimates %>% 
  filter(model == "(a) No buffer, no controls" | model == "(b) No buffer, all controls") %>%
  ggplot() + 
  geom_hline(yintercept = 0 + shift, size = 0.2, col = I("grey")) +
  geom_point(mapping = aes(x = paci, y = estimate + shift)) + 
  geom_pointrange(aes(x = paci, y = estimate + shift, ymin = low95 + shift, ymax = hi95 + shift),
                  fatten = 2, size = 1) +
  geom_text(aes(x = paci + 0.7, y = estimate + 0.4 + shift, 
                label = paste0("N = ", as.character(n))),
            size = 2.5, family = "Times New Roman") +
  ylab("") + xlab("") +
  geom_histogram(data = facet_df %>%
                   filter(model %in% c('(a) No buffer, no controls', '(b) No buffer, all controls')),
                 aes(x = paciHost2005, y = enlarger*((after_stat(count))/sum(after_stat(count))))) +
  facet_rep_wrap("model", repeat.tick.labels = 'all') +
  scale_y_continuous(labels = function(x) x - shift, breaks = c(shift-1.5, shift-1, shift-.5, shift, shift+.5, shift+1)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        strip.background = element_rect(colour = NA, size = 0, fill = 'transparent'),
        strip.placement = "outside",
        text = element_text(colour = "black", size = 12, family = "Times New Roman"))
shift2 <- 3.5 # shift estimates +2 up
enlarger2 <- 25 # factor to multiply histogram and make it larger
p_bot <- estimates %>% 
  filter(model != "(a) No buffer, no controls" & model != "(b) No buffer, all controls") %>%
  ggplot() + 
  geom_hline(yintercept = 0 + shift2, size = 0.2, col = I("grey")) +
  geom_point(mapping = aes(x = paci, y = estimate + shift2)) + 
  geom_pointrange(aes(x = paci, y = estimate + shift2, ymin = low95 + shift2, ymax = hi95 + shift2),
                  fatten = 2, size = 1) +
  geom_text(aes(x = paci + 0.7, y = estimate + 0.6 + shift2, 
                label = paste0("N = ", as.character(n))),
            size = 2.5, family = "Times New Roman") +
  ylab("") + xlab("") +
  geom_histogram(data = facet_df %>%
                   filter(model %in% c('(c) Buffer, no controls', '(d) Buffer, all controls')),
                 aes(x = paciHost2005, y = enlarger2*((after_stat(count))/sum(after_stat(count))))) +
  facet_rep_wrap("model", repeat.tick.labels = 'all') +
  scale_y_continuous(labels = function(x) x - shift2, breaks = c(shift2-3, shift2-1.5, shift2, shift2+1.5, shift2+3)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.key = element_rect(fill = "transparent", color = NA),
        axis.line = element_line(colour = "black", size = 0.3),
        strip.background = element_rect(colour = NA, size = 0, fill = 'transparent'),
        strip.placement = "outside",
        text = element_text(colour = "black", size = 12, family = "Times New Roman"))

plot <- plot_grid(p_top, p_bot, align = 'v', vjust = 1, scale = 1, nrow = 2)

# create common x and y labels
y.grob <- textGrob("Estimated ATT and 95% confidence intervals", 
                   gp = gpar(fontsize = 12, fontfamily = 'Times New Roman'), rot = 90)
x.grob <- textGrob("Host PACI", 
                   gp = gpar(fontsize = 12, fontfamily = 'Times New Roman'), rot = 0)

# add to plot:
grid.arrange(arrangeGrob(plot, left = y.grob, bottom = x.grob))
dev.copy("plots/supp_fig_i1.pdf", device = pdf, width = 7, height = 7)
dev.off()

## Interactive models ----
### OLS model with 2FE (dyad-fixed effects) ----
mod1 <- feols(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) | ddyad + year,
              data = unctad,
              cluster = ~isocodeHome)
summary(mod1)

mod2 <- feols(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
                l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                l.demHost + l.lgdpHost + l.ljiHost | ddyad + year,
              data = unctad,
              cluster = ~isocodeHome)
summary(mod2)

mod3 <- feols(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
                l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                l.demHost + l.lgdpHost + l.ljiHost + gdppcHome + 
                gdpgrowHome + ljiHome | ddyad + year, 
              data = unctad,
              cluster = ~isocodeHome)
summary(mod3)

mod4 <- feols(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
                l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                l.demHost + l.lgdpHost + l.ljiHost + gdppcHome + 
                gdpgrowHome + ljiHome + comlang + colony + bit + dist | ddyad + year, 
              data = unctad,
              cluster = ~isocodeHome)
summary(mod4)

#### Table J.1 ----
cf_map <- c('ratification:I(paciHost2005 * paciHost2005)' = "OECD Convention X Host PACI sq.", 
            'ratification:paciHost2005' = "OECD Convention X Host PACI", 
            'ratification' = "OECD Convention",
            'l.fdigdpHost' = "Lag Host FDI (GDP perc.)", 'l.gdppcHost' = "Lag Host GDP per capita", 
            'l.tradeHost' = "Lag Host Trade (GDP perc.)", 'l.pcHost' = "Lag Host POLCON III", 
            'l.demHost' = "Lag Host Democracy",
            'l.lgdpHost' = "Lag Host GDP (log)", 'l.ljiHost' = "Lag Host Judiciary Indep.", 
            'gdppcHome' = "Home GDP per capita",
            'gdpgrowHome' = "Home GDP Growth (perc.)", 'ljiHome' = "Home Judiciary Indep.", 'bit' = "Dyad BIT")

rows <- tribble(~term,         ~mod1, ~mod2, ~mod3, ~mod4, 
                "Dyad FE",     "Yes", "Yes", "Yes", "Yes",                
                "Year FE",     "Yes", "Yes", "Yes", "Yes")
attr(rows, "position") <- c(29,30)

modelsummary(list(mod1, mod2, mod3, mod4),
             stars = TRUE,
             coef_map = cf_map,
             gof_omit = "AIC|BIC|Log.Lik.|Std.Errors|RMSE|R2 Within|R2 Adj.|FE",
             add_rows = rows,
             # output = "tables/supp_tab_j1.tex",
             title = "Dyadic country-level data. Twoway Fixed-Effect Models. Full disclosure")


### MLE (Random effect models) ----
mod1 <- lmer(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
               (1|isocodeHome:year) + (1|isocodeHost:year) + (1|ddyad),
             data = unctad)
summary(mod1)

mod2 <- lmer(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
               l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
               l.demHost + l.lgdpHost + l.ljiHost + 
               (1|isocodeHome:year) + (1|isocodeHost:year) + (1|ddyad),
             data = unctad)
summary(mod2)

mod3 <- lmer(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
               l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
               l.demHost + l.lgdpHost + l.ljiHost + gdppcHome + 
               gdpgrowHome + ljiHome +
               (1|isocodeHome:year) + (1|isocodeHost:year) + (1|ddyad),
             data = unctad)
summary(mod3)

mod4 <- lmer(logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
               l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
               l.demHost + l.lgdpHost + l.ljiHost + gdppcHome + 
               gdpgrowHome + ljiHome +
               bit + comlang + colony + dist +
               (1|isocodeHome:year) + (1|isocodeHost:year) + (1|ddyad),
             data = unctad)
summary(mod4)

#### Table J.2 ----
cf_map <- c('ratificationI(paciHost2005 * paciHost2005)' = "OECD Convention X Host PACI sq.", 
            'ratificationpaciHost2005' = "OECD Convention X Host PACI", 
            'ratification' = "OECD Convention",
            'I(paciHost2005 * paciHost2005)' = 'Host PACI sq.',
            'paciHost2005' = 'Host PACI',
            'l.fdigdpHost' = "Lag Host FDI (GDP perc.)", 'l.gdppcHost' = "Lag Host GDP per capita", 
            'l.tradeHost' = "Lag Host Trade (GDP perc.)", 'l.pcHost' = "Lag Host POLCON III", 
            'l.demHost' = "Lag Host Democracy",
            'l.lgdpHost' = "Lag Host GDP (log)", 'l.ljiHost' = "Lag Host Judiciary Indep.", 
            'gdppcHome' = "Home GDP per capita",
            'gdpgrowHome' = "Home GDP Growth (perc.)", 'ljiHome' = "Home Judiciary Indep.", 'bit' = "Dyad BIT",
            'comlang' = 'Dyad Common Language', 'colony' = 'Dyad Colonial Relation',
            'dist' = 'Dyad distance',
            '(Intercept)' = 'Constant')

rows <- tribble(~term,                            ~mod1, ~mod2, ~mod3, ~mod4, 
                "Home-Host, year intercepts",     "Yes", "Yes", "Yes", "Yes",                
                "Dyad intercepts",                "Yes", "Yes", "Yes", "Yes",
                "Log Likelihood", 
                summary(mod1)$log %>% as.numeric() %>% round(digits = 1) %>% as.character(), 
                summary(mod2)$log %>% as.numeric() %>% round(digits = 1) %>% as.character(), 
                summary(mod3)$log %>% as.numeric() %>% round(digits = 1) %>% as.character(), 
                summary(mod4)$log %>% as.numeric() %>% round(digits = 1) %>% as.character())
attr(rows, "position") <- c(41,42, 44)

modelsummary(list("(1)" = mod1, 
                  "(2)" = mod2, 
                  "(3)" = mod3, 
                  "(4)" = mod4),
             stars = TRUE,
             coef_map = cf_map,
             gof_omit = "BIC|ICC|RMSE|R2 Marg.|R2 Cond.",
             add_rows = rows,
             title = "Dyadic country-level data. Multilevel models",
             output = "tables/supp_tab_j2.tex"
             )


### Selection models ----
# using paciHost
mod1 <- selection(selection = flowsBinary ~ ratification*paciHost2005 + ratification*I(paciHost2005*paciHost2005),
                  outcome = logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005), 
                  data = unctad,
                  method = "2step")
summary(mod1)

mod2 <- selection(selection = flowsBinary ~ ratification*paciHost2005 + ratification*I(paciHost2005*paciHost2005) + 
                    l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                    l.demHost + l.lgdpHost + l.ljiHost,
                  outcome = logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
                    l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                    l.demHost + l.lgdpHost + l.ljiHost, 
                  data = unctad,
                  method = "2step")
summary(mod2)

mod3 <- selection(selection = flowsBinary ~ ratification*paciHost2005 + ratification*I(paciHost2005*paciHost2005) + 
                    l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                    l.demHost + l.lgdpHost + l.ljiHost + gdppcHome + gdpgrowHome + ljiHome,
                  outcome = logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
                    l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                    l.demHost + l.lgdpHost + l.ljiHost + gdppcHome + gdpgrowHome + ljiHome, 
                  data = unctad,
                  method = "2step")
summary(mod3)

mod4 <- selection(selection = flowsBinary ~ ratification*paciHost2005 + ratification*I(paciHost2005*paciHost2005) + 
                    l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                    l.demHost + l.lgdpHost + l.ljiHost + comlang + colony + bit + gdppcHome + gdpgrowHome + ljiHome,
                  outcome = logflowsAtoB ~ ratification*paciHost2005 + ratification*I(paciHost2005 * paciHost2005) +
                    l.fdigdpHost + l.gdppcHost + l.gdppcHost + l.tradeHost + l.pcHost +
                    l.demHost + l.lgdpHost + l.ljiHost + comlang + colony + bit + gdppcHome + gdpgrowHome + ljiHome, 
                  data = unctad,
                  method = "2step")
summary(mod4)

#### Table J.3 ----
texreg(list(mod1, mod2, mod3, mod4),
       custom.coef.names = c("S: Constant", "S: OECD Convention", "S: Host PACI", "S: Host PACI sq.",
                             "S: OECD Convention X Host PACI", 
                             "S: OECD Convention X Host PACI sq.",
                             "O: Constant", "O: OECD Convention", "O: Host PACI", "O: Host PACI sq.",
                             "O: OECD Convention X Host PACI", 
                             "O: OECD Convention X Host PACI sq.", 
                             "Inverse Mills Ratio", "Sigma", "Rho",
                             "S: Lag Host FDI (GDP perc.)", "S: Lag Host GDP per capita", 
                             "S: Lag Host Trade (GDP perc.)", "S: Lag Host POLCON III", "S: Lag Host Democracy",
                             "S: Lag Host GDP (log)", "S: Lag Host Judiciary Indep.", 
                             "O: Lag Host FDI (GDP perc.)", "O: Lag Host GDP per capita", 
                             "O: Lag Host Trade (GDP perc.)", "O: Lag Host POLCON III", "O: Lag Host Democracy",
                             "O: Lag Host GDP (log)", "O: Lag Host Judiciary Indep.", 
                             "S: Home GDP per capita",
                             "S: Home GDP Growth (perc.)", "S: Home Judiciary Indep.", 
                             "O: Home GDP per capita",
                             "O: Home GDP Growth (perc.)", "O: Home Judiciary Indep.", 
                             "S: Dyad Common Language",
                             "S: Dyad Colonial Relation", "S: Dyad BIT",
                             "O: Dyad Common Language",
                             "O: Dyad Colonial Relation", "O: Dyad BIT"),
       reorder.coef = c(6,5,2,4,3,16:22,30:32,36:38,1,
                        12,11,8,10,9,23:29,33:35,39:41,7,13:15),
       stars = c(0.001, 0.01, 0.05, 0.1), include.adj = FALSE,
       symbol = c("+"),
       caption = "Dyadic country-level data. Heckman selection models") %>% 
  write_file("tables/supp_tab_j3.tex")

#====# The End #====#